R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Figure 3A

color_opacity <- function(col,alpha=0.5){
  library(gplots)
  sapply(col, function(x){
    colorRampPalette(c("white",x))(10)[round(alpha*10)]
  }) %>% as.character
}

SplitDupData <- function(data,column,pat){
  for(cc in column){
    if(length(grep(pat,data[,cc]))>0){
      split_list <- strsplit(data[,cc],pat)
      new_index <- rep(1:nrow(data),sapply(split_list,length))
      data <- data[new_index,]
      data[,cc] <- unlist(split_list)
      row.names(data) <- NULL
    }
  }
  return(data)
}

Plot_KEGGPathway_Network <- function(Reaction,KO_stat,CPD_stat,Title=NULL,
                                     Enzyme2KO,EnzymeName,KONames,KEGGID2CPDName){
  NodeShape <- structure(c("ellipse","box","square"), 
                         names = c("Compound","Enzyme","Orthology"))
  Grey <- "#DCDCDC"
  DarkGrey <- "#A9A9A9"
  
  Reaction$Enzyme <- as.character(1:nrow(Reaction))
  Reaction$KO <- sapply(Reaction$GeneEntry,function(ec) paste(unique(unlist(Enzyme2KO[ec])),collapse = "; "))
  Reaction$KO[Reaction$KO==""] <- NA
  exists_enzyme <- Reaction$Enzyme[sapply(strsplit(Reaction$KO,"; "), function(k) 
    length(intersect(k,KO_stat$ID))>0)] %>% unique
  
  Data <- SplitDupData(data = Reaction[,-1],column = c("SubstrateName","ProductName","KO"),pat = "; ")
  Data$color <- DarkGrey
  exists_reaction <- which(Data$SubstrateName %in% CPD_stat$KEGGID +
                             Data$ProductName %in% CPD_stat$KEGGID +
                             Data$Enzyme %in% exists_enzyme==3)
  if(length(exists_reaction)>0){
    Data$color[exists_reaction] <- "black"
    ExistsReaction <- Data[exists_reaction,]
  }else{
    ExistsReaction <- NULL
  }
  
  edges <- rbind(data.frame(from=Data$SubstrateName,
                            to=Data$Enzyme,
                            arrows=ifelse(Data$ReactionType=="irreversible","none","from"),
                            color=Data$color),
                 data.frame(from=Data$Enzyme,
                            to=Data$ProductName,
                            arrows="to",
                            color=Data$color),
                 na.omit(data.frame(from=Data$Enzyme,
                                    to=Data$KO,
                                    arrows="none",
                                    color=DarkGrey))) %>% unique
  edges$dashes <- F
  edges$length <- 200
  edges$width <- 2
  edges$dashes[grep("^K",edges$to)] <- T
  edges$length[grep("^K",edges$to)] <- 50
  edges$width[grep("^K",edges$to)] <- 1
  
  AllNodes <- unique(c(edges$from,edges$to))
  IDs <- structure(1:length(AllNodes),names=AllNodes)
  edges$from <- IDs[edges$from]
  edges$to <- IDs[edges$to]
  
  cpd_stat <- subset(CPD_stat,KEGGID %in% AllNodes)
  ko_stat <- subset(KO_stat,ID %in% Data$KO)
  nodes <- data.frame(id = 1:length(AllNodes),
                      size = 20,
                      title = AllNodes,
                      label = AllNodes,
                      group = "Enzyme",
                      borderWidth = 1.5,
                      color.background = ifelse(AllNodes %in% exists_enzyme,DarkGrey,Grey),
                      font = "20px arial black",
                      row.names = AllNodes)
  
  nodes$group[grep("^[CDG]",nodes$label)] <- "Compound"
  nodes$group[grep("^K",nodes$label)] <- "Orthology"
  nodes$shape <- NodeShape[nodes$group]
  
  index <- tapply(1:nrow(nodes), nodes$group, c)
  nodes$label[index$Compound] <- KEGGID2CPDName[nodes$label[index$Compound]]
  nodes$title[index$Compound] <- paste0(nodes$title[index$Compound]," (",nodes$label[index$Compound],")")
  nodes[cpd_stat$KEGGID,"label"] <- cpd_stat$Name
  nodes$label[index$Enzyme] <- Reaction$GeneEntry[as.numeric(nodes$label[index$Enzyme])]
  nodes$title[index$Enzyme] <- EnzymeName[nodes$label[index$Enzyme]]
  nodes$title[index$Orthology] <- KONames[nodes$title[index$Orthology]]
  
  nodes$shape[nodes$group=="Compound"] <- "ellipse"
  nodes$shape[nodes$group=="Orthology"] <- "square"
  nodes$size[nodes$group=="Orthology"] <- 10
  
  nodes[c(cpd_stat$KEGGID,ko_stat$ID),"color.background"] <- c(cpd_stat$Color,ko_stat$Color)
  nodes$color.border <- nodes$color.background
  nodes[c(cpd_stat$KEGGID[cpd_stat$Sig],ko_stat$ID[ko_stat$Sig]),"color.border"] <- "black"
  
  library(visNetwork)
  html <- visNetwork(nodes = nodes,edges = edges,
                     width = 1000,height = 1000,
                     main = list(text=Title,
                                 style="font-family:arial;font-size:20px;text-align:center;")) %>%
    visIgraphLayout(randomSeed = 1, layout = "layout_with_graphopt") %>%
    visOptions(highlightNearest = list(enabled =TRUE, degree = 1)) %>%
    visInteraction(dragView = T)  %>% visLegend()
  
  
  for(G in names(NodeShape)){
    html <- html %>% visGroups(groupname = G, 
                               shape = as.character(NodeShape[G]),
                               color = Grey,
                               size = 10) 
  }
  
  return(list(html=html,ExistsReaction=ExistsReaction,KO=unique(ko_stat$ID)))
}

library(openxlsx)
library(magrittr)

load("Enzyme2KO.RData")
load("EnzymeName.RData")
load("KONames.RData")
load("KEGGID2CPDName.RData")

Reaction <- read.xlsx("Figure3_A_data1.xlsx")
KO <- read.xlsx("Figure3_A_data2.xlsx")
Meta <- read.xlsx("Figure3_A_data3.xlsx")

reList <- Plot_KEGGPathway_Network(Reaction = Reaction,
                                   KO_stat = KO,
                                   CPD_stat = Meta,
                                   Title = "ko00360: Phenylalanine metabolism",
                                   Enzyme2KO = Enzyme2KO,
                                   EnzymeName = EnzymeName,
                                   KONames = KONames,
                                   KEGGID2CPDName = KEGGID2CPDName)
reList$html

Figure 3B

Orthology_Pieplot <- function(data,K){
  data$Microbe <- factor(data$Microbe,levels = data$Microbe)
  data$Y <- sapply(1:nrow(data),function(x) 100-(sum(data$Percent[1:x])-0.5*data$Percent[x]))
  
  pie <- ggplot(data,aes(0,Percent,fill=Microbe)) + 
    theme_few() +
    geom_segment(aes(x = 0.45,y = Y,xend = 0.52,yend = Y),linewidth=0.3) +
    geom_bar(stat = "identity") +
    scale_fill_aaas() +
    scale_y_continuous(breaks = data$Y, labels = data$label) +
    theme(plot.title = element_text(size = 15,face = "bold",hjust = 0.5),
          panel.border = element_blank(),
          legend.title = element_blank(),
          legend.text = element_text(size = 12),
          axis.title = element_blank(),
          axis.text.x = element_text(size = 12),
          axis.text.y = element_blank(),
          axis.ticks.y = element_blank()) +
    coord_polar(theta = "y") +
    labs(title = K)

  return(pie)
}

library(openxlsx)
library(ggplot2)
library(ggthemes)
library(ggsci)

data <- read.xlsx("Figure3_B_data.xlsx")
data <- data[order(-data$Percent),]
small <- which(data$Percent<1)
if(length(small)>0){
  data <- rbind(data[-small,],data.frame(Microbe="Others",Percent=100-sum(data$Percent[-small])))
}
if(nrow(data)>10){
  data <- rbind(head(data,9),data.frame(Microbe="Others",Percent=100-sum(data$Percent[1:9])))
}
data$label <- paste0(round(data$Percent,2),"%")

p <- Orthology_Pieplot(data = data,K = "K00817")
print(p)

Figure 3C

LDA_Barplot <- function(res,filter=T,lda=2,color,font_family="Times"){
  library(ggplot2)
  library(ggthemes)
  
  if(filter){
    res <- res[res$`LDAScore(log10)`>lda & res$Pvalue<0.05,]
    if(nrow(res)==0) return(NULL)
  }
  
  res <- head(res,50)
  group <- intersect(names(color),unique(res$Group))
  idx <- lapply(group, function(x) which(res$Group==x)) %>% unlist
  res <- res[idx,]
  
  res$Description <- res[,ncol(res)-4]
  str_l <- na.omit(nchar(res$Description)) %>% max
  
  res$Description <- factor(res$Description,levels = rev(res$Description))
  res$log_P <- log(as.numeric(res$Pvalue),0.05)
  res$Group <- factor(res$Group,levels = group)
  res$Label <- ""
  res$Label[res$`LDAScore(log10)`>lda & res$Pvalue<0.05] <- "*"
  
  p_bar <- ggplot(res,aes(Description,`LDAScore(log10)`,fill=Group)) + 
    theme_hc(base_family = font_family) +
    scale_fill_manual(values = as.character(color[group]),guide=guide_legend(ncol = 2)) + 
    geom_bar(stat="identity",color="black",width=0.8) + 
    theme(legend.position = "top",
          legend.title = element_blank(),
          panel.grid.major.x = element_line(colour = "darkgray",linetype = "dashed"),
          panel.grid.major.y = element_blank(),
          axis.ticks.y  = element_blank(),
          axis.text.y = element_text(size=12)) +
    labs(y="LDA SCORE(log 10)",x="")
  
  p_bar <- p_bar + geom_text(aes(label = Label),size = 13,hjust = -0.2,vjust = 0.7) +
    scale_y_continuous(limits = c(0,max(res$`LDAScore(log10)`)*1.05))
  p_bar <- p_bar + coord_flip()
  
  return(p_bar)
}

LEfSe_R <- function(Data,Groups,filter=T,exclude = NULL,Outdir,Label=NULL,Color,fontfamily="Times"){
  library(microeco)
  row.names(Groups) <- Groups$ID
  feture <- Data[,Groups$ID]
  row.names(feture) <- Data$ID
  Anno <- Data[,!colnames(Data) %in% Groups$ID,drop=F]
  row.names(Anno) <- Anno$ID
  
  dataset <- microtable$new(sample_table = Groups,otu_table = feture,tax_table = Anno[,"ID",drop=F])
  lefse <- trans_diff$new(dataset = dataset, method = "lefse", group = "Groupings", boots = 100,
                          alpha = 1, p_adjust_method = "none")
  if(nrow(lefse$res_diff)==0) return(NULL)
  LDAScore <- lefse$res_diff[,c("Taxa","Group","LDA","P.unadj")]
  
  Abun <- lefse$res_abund
  Abun <- Abun[paste(Abun$Taxa,Abun$Group) %in% paste(LDAScore$Taxa,LDAScore$Group),c("Taxa","Mean")]
  LDAScore <- merge(LDAScore,Abun,by = "Taxa")
  LDAScore <- LDAScore[order(-LDAScore$LDA),]
  LDAScore$Mean <- log(LDAScore$Mean*1000000,10)
  LDAScore <- LDAScore[,c("Taxa","Mean","Group","LDA","P.unadj")]
  colnames(LDAScore)[c(2,4,5)] <- c("Abun(log10)","LDAScore(log10)","Pvalue")
  LDAScore <- cbind(Anno[LDAScore$Taxa,,drop=F],LDAScore[,-1])
  
  if(!is.null(exclude)){
    LDAScore <- subset(LDAScore,!ID %in% exclude)
  }
  
  bar <- LDA_Barplot(res = LDAScore,filter = filter,lda = 2,color = Color,font_family = fontfamily)
  
  LDAScore$`Abun(log10)` <- round(LDAScore$`Abun(log10)`, 2)
  LDAScore$`LDAScore(log10)` <- round(LDAScore$`LDAScore(log10)`,2)
  LDAScore$Pvalue <- round(LDAScore$Pvalue,3)
  
  return(list(LDAScore=LDAScore,bar=bar))
}

library(openxlsx)
library(magrittr)

Sampleinfo <- read.csv("Sampleinfo.csv")
KOData <- read.xlsx("Figure3_C_data.xlsx")
data <- read.xlsx("Figure3_B_data.xlsx")
K <- "K00817"

data <- data[order(-data$Percent),]
Microbes <- head(data$Microbe,8)

idx <- which(KOData$KO==K & KOData$Microbe %in% Microbes)
kodata <- KOData[idx,c("Microbe",Sampleinfo$Microbiome_sampleID)]
EX <- matrix(colSums(KOData[-idx,Sampleinfo$Microbiome_sampleID]),nrow = 1) %>% 
  as.data.frame %>% cbind(Microbe="MetOrigin_ex",.)
colnames(EX) <- colnames(kodata)
kodata <- rbind(kodata,EX)
colnames(kodata)[1] <- "ID"

Groups <- Sampleinfo[,c("Microbiome_sampleID","Grouping")]
colnames(Groups) <- c("ID","Groupings")

res <- LEfSe_R(Data = kodata,Groups = Groups,filter = F,exclude = "MetOrigin_ex",
               Color = structure(c("#BEBAB9","#C47070"),names=0:1))
## No taxa_abund list found. Calculate it with cal_abund function ...
## The result is stored in object$taxa_abund ...
## 9 input features ...
## 9 features are remained after removing unknown features ...
## Start Kruskal-Wallis rank sum test for Groupings ...
## 9 taxa found significant ...
## After P value adjustment, 9 taxa found significant ...
## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear

## Warning in lda.default(x, grouping, ...): variables are collinear
## Minimum LDA score: 1.58518566007549 maximum LDA score: 2.8647186048831
## Taxa abundance table is stored in object$res_abund ...
## lefse analysis result is stored in object$res_diff ...
res$bar

res$LDAScore
##                                                                        ID
## unclassified Bacteroides species         unclassified Bacteroides species
## Faecalibacterium prausnitzii                 Faecalibacterium prausnitzii
## unclassified Lachnospiraceae species unclassified Lachnospiraceae species
## Bacteroides ovatus CAG:22                       Bacteroides ovatus CAG:22
## Ruminococcus sp.                                         Ruminococcus sp.
## unclassified Eubacteriales species     unclassified Eubacteriales species
## unclassified Bacteroidaceae species   unclassified Bacteroidaceae species
## Anaerostipes sp.                                         Anaerostipes sp.
##                                      Abun(log10) Group LDAScore(log10) Pvalue
## unclassified Bacteroides species            3.24     0            2.86  0.002
## Faecalibacterium prausnitzii                2.16     0            1.94  0.051
## unclassified Lachnospiraceae species        2.46     1            1.82  0.304
## Bacteroides ovatus CAG:22                   1.98     0            1.79  0.051
## Ruminococcus sp.                            1.53     0            1.74  0.959
## unclassified Eubacteriales species          2.54     1            1.74  0.719
## unclassified Bacteroidaceae species         2.61     0            1.73  0.471
## Anaerostipes sp.                            1.96     0            1.59  0.837

Figure 3D

Corr_Sankey <- function(Edges,Nodes,Width=1000,Height=1000,PadHeight=15){
  id <- 0:(nrow(Nodes)-1)
  names(id) <- row.names(Nodes)
  
  Links <- Edges[,c("from","to","color","value")]
  Links$from <- id[Links$from]
  Links$to <- id[Links$to]
  
  html <-
    plot_ly(
      type = "sankey",
      orientation = "h",
      valueformat = NULL,
      valuesuffix = F,
      width = Width,
      height = Height,
      
      node = list(
        label = Nodes$label,
        color = Nodes$color,
        pad = PadHeight,
        thickness = 20,
        line = list(
          width = 0
        ),
        hoverlabel = list(
          font = list(size=16)
        )
      ),
      
      link = list(
        source = Links$from,
        target = Links$to,
        value = Links$value,
        color = Links$color
      )
    ) %>%
    layout(
      font = list(
        size = 18
      ),
      xaxis = list(showgrid = F, zeroline = F),
      yaxis = list(showgrid = F, zeroline = F)
      , margin = list(t=50,b=50,r=50)
    ) %>%
    config(
      displayModeBar = T,
      toImageButtonOptions = list(
        format = "svg",
        filename = 'sankey'
      ),
      modeBarButtons = list(list("toImage")),
      displaylogo = F)
  html
}

Mediate_Sankey <- function(micro_meta,meta_pheno,statPath="./",Level="genus",MetaFilter=NULL,
                           Colors=c("#FF0000","#008000","#FF0000","#008000"),
                           Width=1000,Height=1000,PadHeight=15){
  # NodeHeight <- 6
  # PadHeight <- 15
  RColor <- c(colorRampPalette(c(Colors[4],"white"))(101),colorRampPalette(c("white",Colors[3]))(101)[-1])
  names(RColor) <- -100:100
  
  if(!is.null(MetaFilter)){
    micro_meta <- subset(micro_meta,Metabolite %in% MetaFilter)
    meta_pheno <- subset(meta_pheno,Metabolite %in% MetaFilter)
  }
  colnames(micro_meta)[2:1] <- colnames(meta_pheno)[1:2] <- c("from","to")
  
  Edges <- rbind(micro_meta[,c("from","to","R")],
                 meta_pheno[,c("from","to","R")])
  Edges$color <- RColor[as.character(round(Edges$R*100))]
  Edges$value <- 1
  Edges <- rbind(Edges[,-3],
                 data.frame(from="hide1",to=unique(micro_meta$from),color="white",value=0.01),
                 data.frame(from=unique(meta_pheno$to),to="hide2",color="white",value=0.01))
  

  Nodes <- data.frame(label=unique(c(Edges$from,Edges$to)),color="darkgrey")
  row.names(Nodes) <- Nodes$label
  Nodes[c("hide1","hide2"),"label"] <- ""
  Nodes[c("hide1","hide2"),"color"] <- "white"
  
  varList <- list()
  for(tp in c(Level,"Metabolite","Phenotype")){
    diff_result <- read.xlsx(file.path(statPath,paste0("Diff_Table_",tp,".xlsx"))) %>%
      subset(.,Name %in% Nodes$label)
    if(nrow(diff_result)>0){
      varList[[tp]] <- diff_result$Name
      diff_result$color <- "black"
      if("FC" %in% colnames(diff_result)){
        diff_result$color[diff_result$FC>1] <- Colors[1]
        diff_result$color[diff_result$FC<1] <- Colors[2]
      }else{
        diff_result$color[diff_result$R>0] <- Colors[3]
        diff_result$color[diff_result$R<0] <- Colors[4]
      }
      Nodes[diff_result$Name,"color"] <- diff_result$color
    }
  }
  
  html <- Corr_Sankey(Edges = Edges,Nodes = Nodes,Width = Width,Height = Height,PadHeight=PadHeight)
  html
}

library(openxlsx)
library(magrittr)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
micro_meta <- read.xlsx("Figure3_D_data1.xlsx")
meta_pheno <- read.xlsx("Figure3_D_data2.xlsx")

res <- Mediate_Sankey(micro_meta = micro_meta,meta_pheno = meta_pheno,Level =  "species",
                      MetaFilter = c("behenoyl dihydrosphingomyelin (d18:0/22:0)*",
                                     "sphingomyelin (d18:0/18:0, d19:0/17:0)*",
                                     "sphingomyelin (d18:0/20:0, d16:0/22:0)*"))
res

Figure 3E

MediatePlot <- function(re,beta){
  num_format <- function(x,cutoff=0.01){
    X <- round(x,3)
    absX <- abs(x)
    if(any(absX<cutoff,na.rm = T)) X[which(absX<cutoff)] <- format(x[which(absX<cutoff)], digits=3,scientific = TRUE)
    return(X)
  }
  
  Format <- function(x){
    X <- num_format(x)
    paste0("beta = ",X[1],", P = ",X[2])
  }
  
  pos <- data.frame(label=c(re$Microbe[1],re$Metabolite[1],re$Phenotype[1]),
                    x=c(0,0.5,1),y=c(0,sqrt(0.75),0))
  pos$xend <- pos$x[c(2,3,1)]
  pos$yend <- pos$y[c(2,3,1)]
  pos$texty <- c(-0.15,sqrt(0.75)+0.15,-0.15)
  
  w <- 0.08
  d <- sqrt((2*w)^2-w^2)
  arrow <- data.frame(x=c(d,0.5,1-d),y=c(w,sqrt(0.75)-2*w,w))
  arrow$xend <- arrow$x[c(2,3,1)]
  arrow$yend <- arrow$y[c(2,3,1)]
  
  Medi <- paste0(round(re$Estimate[3]*100),"%")
  MediP <- paste0("P[medi]==",num_format(re$`p-value`[3]))
  
  Beta <- lapply(c("Micro_Meta","Meta_Pheno","Micro_Pheno"), function(x) beta[1,paste0(c("Beta","P")," (",x,")")])
  BetaText <- sapply(Beta, Format)
  BetaColor <- ifelse(sapply(Beta, "[", 2)<0.05,"black","#676565") %>% as.character
  
  P <- c(Beta[[1]][2],Beta[[2]][2],re$`p-value`[3]) %>% as.numeric
  P[is.na(P)] <- 1
  if(all(P<0.05)){
    Color <- "#BB3E23"
  }else{
    Color <- "#676565"
  }
  
  p <- ggplot(pos,aes(x,y)) + theme_tufte() + coord_fixed(ratio = 1) +
    theme(text = element_blank(),axis.ticks = element_blank()) +
    geom_segment(aes(xend=xend,yend=yend)) +
    geom_line(data=arrow,linewidth=2,color=Color,
              arrow = arrow(length=unit(0.40,"cm"))) +
    geom_point(size = 8,color="white") +
    geom_point(size = 5,shape = 21,fill="grey") +
    annotate("text",x=0.5,y=0.38,label=Medi,color=Color,size=12)+
    annotate("text",x=0.5,y=0.26,label=MediP,color=Color,size=6,parse=T)+
    geom_text(aes(y=texty,label = label),size=5) + 
    geom_text(aes(x=c(0.2,0.8,0.5),y=c(sqrt(0.75)/2,sqrt(0.75)/2,-0.04),label=BetaText),
              color=BetaColor,angle=c(60,-60,0),size=4.5) +
    scale_x_continuous(limits = c(-0.3,1.3)) +
    scale_y_continuous(limits = c(-0.2,1.1))
  p
}

Mediation <- function(Data,VarList){
  NewName <- lapply(names(VarList), function(v) paste0(v,1:length(VarList[[v]]))) %>% unlist
  NewNameSub <- structure(NewName,names=unlist(VarList))
  NewNameSub_rev <- structure(unlist(VarList),names=NewName)
  Data <- Data[,names(NewNameSub)]
  colnames(Data) <- as.character(NewNameSub)
  
  Pairs <- lapply(NewNameSub[VarList$Metabolite], function(m){
    lapply(NewNameSub[VarList$Microbiome], function(x) data.frame(X=x,M=m,Y=NewNameSub[VarList$Phenotype])) %>% do.call("rbind",.)
  }) %>% do.call("rbind",.)
  
  ResultList <- lapply(1:nrow(Pairs), function(i){
    library(mediation)
    library(magrittr)
    X <- formula(paste(Pairs$M[i],"~",Pairs$X[i]))
    Y <- formula(paste(Pairs$Y[i],"~",Pairs$X[i],"+",Pairs$M[i]))
    model_m <- lm(X,Data)
    model_y <- lm(Y,Data)
    model_m$call$formula <- X
    model_y$call$formula <- Y
    
    set.seed(222)
    result <- try(mediate(
      model.m = model_m,
      model.y = model_y,
      treat = Pairs$X[i],
      mediator = Pairs$M[i],
      boot=T,sims=200,
      boot.ci.type = "perc"))
    if(class(result)!="mediate") return(NULL)
    summ <- summary(result)
    
    Beta <- cbind(summary(result$model.m)$coefficients[2,c(1,4),drop=F],
                  summary(result$model.y)$coefficients[3,c(1,4),drop=F],
                  summary(result$model.y)$coefficients[2,c(1,4),drop=F],
                  summary(lm(as.formula(paste(Pairs$Y[i],"~",Pairs$X[i])),Data))$coefficients[2,c(1,4),drop=F]) %>%
      as.data.frame
    colnames(Beta) <- lapply(c("Micro_Meta","Meta_Pheno","Micro_Pheno","Total_Effect"),function(x) paste0(c("Beta (","P ("),x,")")) %>% unlist
    Beta <- cbind(ID=i,Microbe=NewNameSub_rev[Pairs$X[i]],Metabolite=NewNameSub_rev[Pairs$M[i]],Phenotype=NewNameSub_rev[Pairs$Y[i]],Beta)
    
    re <- lapply(c("d","z","n"), function(x){
      summ[paste0(x,c("0","0.ci","0.p"))] %>% unlist
    }) %>% do.call(rbind,.) %>% 
      rbind(.,c(summ$tau.coef,as.numeric(summ$tau.ci),summ$tau.p))  %>% 
      as.data.frame
    colnames(re) <- c("Estimate","95% CI Lower","95% CI Upper","p-value")
    re <- cbind(Name=c("ACME","ADE","Prop. Mediated","Total Effect"),re)
    re <- cbind(ID=i,Microbe=NewNameSub_rev[Pairs$X[i]],Metabolite=NewNameSub_rev[Pairs$M[i]],Phenotype=NewNameSub_rev[Pairs$Y[i]], re)
    return(list(Beta=Beta,re=re))
  })
  
  BetaData <- lapply(ResultList, function(r) r$Beta) %>% do.call(rbind,.)
  Result <- lapply(ResultList, function(r) r$re) %>% do.call(rbind,.)
  
  PlotList <- list()
  for(i in 1:nrow(Pairs)){
    re <- subset(Result,ID==i)
    beta <- subset(BetaData,ID==i)
    
    PlotList[[i]] <- MediatePlot(re = re,beta = beta)
  }
  return(list(BetaData=BetaData,Result=Result,PlotList=PlotList))  
}


library(readxl)
library(magrittr)
library(ggplot2)
library(ggthemes)

Data <- read_xlsx("Figure3_E_data.xlsx")


VarList1 <- list(Microbiome = "Bacteroides fluxus",
                 Metabolite = "behenoyl dihydrosphingomyelin (d18:0/22:0)*",
                 Phenotype = "Weight-Zscore")

VarList2 <- list(Microbiome = "Bacteroides uniformis",
                 Metabolite = "sphingomyelin (d18:0/20:0, d16:0/22:0)*",
                 Phenotype = "ALT")

model1 <- Mediation(Data = Data,VarList = VarList1)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
## 
##     select
## Loading required package: Matrix
## Loading required package: mvtnorm
## Loading required package: sandwich
## mediation: Causal Mediation Analysis
## Version: 4.5.0
## Running nonparametric bootstrap
## Warning in data.frame(..., check.names = FALSE): row names were found from a
## short variable and have been discarded
model1$BetaData
##             ID            Microbe                                  Metabolite
## Microbiome1  1 Bacteroides fluxus behenoyl dihydrosphingomyelin (d18:0/22:0)*
##                 Phenotype Beta (Micro_Meta) P (Micro_Meta) Beta (Meta_Pheno)
## Microbiome1 Weight-Zscore         -0.520444    0.006417056         0.4579335
##             P (Meta_Pheno) Beta (Micro_Pheno) P (Micro_Pheno)
## Microbiome1       0.021436         -0.2811853       0.1432333
##             Beta (Total_Effect) P (Total_Effect)
## Microbiome1           -0.519514      0.006529272
model1$Result
##   ID            Microbe                                  Metabolite
## 1  1 Bacteroides fluxus behenoyl dihydrosphingomyelin (d18:0/22:0)*
## 2  1 Bacteroides fluxus behenoyl dihydrosphingomyelin (d18:0/22:0)*
## 3  1 Bacteroides fluxus behenoyl dihydrosphingomyelin (d18:0/22:0)*
## 4  1 Bacteroides fluxus behenoyl dihydrosphingomyelin (d18:0/22:0)*
##       Phenotype           Name   Estimate 95% CI Lower 95% CI Upper p-value
## 1 Weight-Zscore           ACME -0.2383288   -0.4278005  -0.10273386    0.00
## 2 Weight-Zscore            ADE -0.2811853   -0.5271369  -0.02085516    0.03
## 3 Weight-Zscore Prop. Mediated  0.4587533    0.1999421   0.92571343    0.00
## 4 Weight-Zscore   Total Effect -0.5195140   -0.7976302  -0.27897045    0.00
model1$PlotList[[1]]

model2 <- Mediation(Data = Data,VarList = VarList2)
## Running nonparametric bootstrap
## Warning in data.frame(..., check.names = FALSE): row names were found from a
## short variable and have been discarded
model2$BetaData
##             ID               Microbe                              Metabolite
## Microbiome1  1 Bacteroides uniformis sphingomyelin (d18:0/20:0, d16:0/22:0)*
##             Phenotype Beta (Micro_Meta) P (Micro_Meta) Beta (Meta_Pheno)
## Microbiome1       ALT        -0.3953089     0.04562832         0.4110902
##             P (Meta_Pheno) Beta (Micro_Pheno) P (Micro_Pheno)
## Microbiome1     0.04322356         -0.2130573       0.2789311
##             Beta (Total_Effect) P (Total_Effect)
## Microbiome1          -0.3755649       0.05866287
model2$Result
##   ID               Microbe                              Metabolite Phenotype
## 1  1 Bacteroides uniformis sphingomyelin (d18:0/20:0, d16:0/22:0)*       ALT
## 2  1 Bacteroides uniformis sphingomyelin (d18:0/20:0, d16:0/22:0)*       ALT
## 3  1 Bacteroides uniformis sphingomyelin (d18:0/20:0, d16:0/22:0)*       ALT
## 4  1 Bacteroides uniformis sphingomyelin (d18:0/20:0, d16:0/22:0)*       ALT
##             Name   Estimate 95% CI Lower 95% CI Upper p-value
## 1           ACME -0.1625076  -0.37515025  -0.02474767    0.02
## 2            ADE -0.2130573  -0.64146290   0.02466267    0.07
## 3 Prop. Mediated  0.4327017   0.06639403   1.08044052    0.02
## 4   Total Effect -0.3755649  -0.80101465  -0.14984779    0.00
model2$PlotList[[1]]